# # -*- coding: utf-8 -*-
# import random
# import scrapy
# from scrapy import Request
# from zc_core.util.http_util import retry_request
# from zc_core.dao.sku_pool_dao import SkuPoolDao
# from zc_core.dao.batch_dao import BatchDao
# from scrapy.exceptions import IgnoreRequest
# from zc_core.util.done_filter import DoneFilter
# from cmcc.rules import *
#
#
# class ProductFullSpider(BaseSpider):
#     name = 'product_full'
#     custom_settings = {
#         'CONCURRENT_REQUESTS': 12,
#         # 'DOWNLOAD_DELAY': 1,
#         'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
#         'CONCURRENT_REQUESTS_PER_IP': 12,
#     }
#     item_url = 'http://b2bjoy.10086.cn/oscp/goods/productBrowse/goodsDetail.html?sn={}'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(ProductFullSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         # 创建批次记录
#         BatchDao().create_batch(self.batch_no)
#         # 避免重复采集
#         self.done_filter = DoneFilter(self.batch_no)
#
#         self.proxy = 'http://114.220.15.88:4236'
#         self.cookies = {
#             'JSESSIONID': '5715de56e4715cb646c9cdab14fa',
#             'centralSessionId': '_userId-c1db073ffef143c2822e8dfb024c2192_2f84dc6bfe744213bfbcaa06fe37ad71',
#         }
#
#     def start_requests(self):
#         if not self.cookies:
#             self.logger.error('init cookie failed...')
#             return
#         self.logger.info('init cookie: %s', self.cookies)
#
#         settings = get_project_settings()
#         pool_list = SkuPoolDao().get_sku_pool_list(fields={'_id': 1, 'offlineTime': 1})
#         self.logger.info('全量：%s' % (len(pool_list)))
#         random.shuffle(pool_list)
#         for sku in pool_list:
#             sku_id = sku.get('_id')
#             offline_time = sku.get('offlineTime', 0)
#             if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
#                 self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
#                 continue
#             # 避免重复采集
#             if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
#                 self.logger.info('已采: [%s]', sku_id)
#                 continue
#
#             url = self.item_url.format(sku_id)
#             yield Request(
#                 url=url,
#                 cookies=self.cookies,
#                 callback=self.parse_product_data,
#                 errback=self.error_back,
#                 meta={
#                     'proxy': self.proxy,
#                     'reqType': 'item',
#                     'batchNo': self.batch_no,
#                     'skuId': sku_id,
#                 },
#                 headers={
#                     'Host': 'b2bjoy.10086.cn',
#                     'Proxy-Connection': 'keep-alive',
#                     'Cache-Control': 'max-age=0',
#                     'Upgrade-Insecure-Requests': '1',
#                     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
#                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
#                     'Referer': 'http://b2bjoy.10086.cn/oscp/home/main.html',
#                     'Accept-Encoding': 'gzip, deflate',
#                     'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7',
#                 },
#                 priority=250,
#                 dont_filter=True
#             )
#
#     # 处理ItemData
#     def parse_product_data(self, response):
#         meta = response.meta
#
#         # 商品
#         data = parse_product_data(response)
#         if data:
#             self.logger.info('商品: [%s]' % data.get('skuId'))
#             yield data
#         else:
#             self.logger.info('下架: [%s]' % meta.get('skuId'))
#

